Preparing the data

library(readxl)
library(openxlsx)
stuns <- read_excel("data/2015 Reported Taser Data.xlsx", sheet=1)

stuns[1,] <- ifelse(is.na(stuns[1,]), colnames(stuns), stuns[1,])
colnames(stuns) <- stuns[1,]

stuns <- stuns[-1,]

colnames(stuns) <- make.names(colnames(stuns))

# another stream but for python data analysis going on right now:
# https://www.livecoding.tv/jakekara/

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Cleaning up the data

colnames(stuns) <- c("Law.Enforcement.Agency",
                     "Number.of.Incident.Reports",
                     "Incident.Case.Number",
                     "Date.of.Incident",
                     "Time.of.Incident",
                     "Sex",
                     "Race",
                     "Hispanic",
                     "Height",
                     "Weight",
                     "Age",
                     "Deployment.Type.I",
                     "Deployment.Type.II",
                     "Displays.or.Arc",
                     "Drive.Stun.Application",
                     "Activation.After.Probe.Contact",
                     "Number.of.Deployments",
                     "Warning.to.Subject",
                     "Subject.Injured",
                     "Subject.Not.Injured",
                     "Subject.Bruises",
                     "Subject.Abrasions",
                     "Subject.Breathing.Difficulty",
                     "Subject.Probe.Puncture.Only",
                     "Subject.Lost.Consciousness",
                     "Subject.Death",
                     "Subject.Other",
                     "Officer.Injured",
                     "Officer.Not.Injured",
                     "Officer.Bruises",
                     "Officer.Abrasions",
                     "Officer.Breathing.Difficulty",
                     "Officer.Probe.Puncture.Only",
                     "Officer.Lost.Consciousness",
                     "Officer.Death",
                     "Officer.Other",
                     "Location.Environment",
                     "Officer.s.Arrival",
                     "If.Other..Explain",
                     "Crime.in.Progress",
                     "Domestic.Disturbance",
                     "Disturbance..Other.",
                     "Traffic.Stop",
                     "Emotionally.Disturbed.Person",
                     "Suspicious.Person",
                     "Executing.Warrant",
                     "Under.Influence",
                     "Activity.Other",
                     "Non.aggressive",
                     "Previous.Hostility",
                     "Possibly.Intoxicated",
                     "Emotionally.Disturbed",
                     "Aggressive..Verbal.",
                     "Aggressive..Physical.",
                     "Armed.with",
                     "Officer.Perception.Other",
                     "Threat..Hostile",
                     "Dead.Weight..Non.Compliant",
                     "Fighting.Stance..Combative",
                     "Threaten.Use.of.Weapon",
                     "Fleeing",
                     "Unarmed.Assault",
                     "Armed.with.Firearm",
                     "Armed.with.Edged.Weapon",
                     "Armed.with.Blunt.Instrument",
                     "Armed.with.Other",
                     "Failed.to.Follow.Directions",
                     "Suicidal",
                     "Resistance.Other")

stuns$race_ethnicity <- ifelse(stuns$Hispanic==1, "Hispanic", stuns$Race)

Stun incidents by race in the state

by_state <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2))
  
library(knitr)
kable(by_state)
race_ethnicity total percent
Asian 2 0.33
Black 187 30.66
Hispanic 130 21.31
Unknown 1 0.16
White 290 47.54

Total stun incidents per department

library(tidyr)
library(DT)

by_dept_total <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  spread(race_ethnicity, total)

datatable(by_dept_total)

Stun incidents per department by race

by_dept_percent <- stuns %>%
  group_by(Law.Enforcement.Agency, race_ethnicity) %>%
  summarise(total=n()) %>%
  mutate(percent=round(total/sum(total)*100,2)) %>%
  select(Law.Enforcement.Agency, race_ethnicity, percent) %>%
  spread(race_ethnicity, percent)

datatable(by_dept_percent)

Time of stun incidents

stuns$Time.of.Incident <- convertToDateTime(as.numeric(stuns$Time.of.Incident), origin = "2016-07-04")

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
stuns$hour <- hour(stuns$Time.of.Incident)

library(ggplot2)
library(extrafont)
## Registering fonts with R
library(ggalt)

ggplot(stuns, aes(hour)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

# Time of stun incidents by race

ggplot(stuns, aes(hour, fill=race_ethnicity)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Month

stuns$Date.of.Incident <- as.POSIXct(as.numeric(stuns$Date.of.Incident) * (60*60*24)
                                     , origin="1899-12-30"
                                     , tz="GMT")

stuns$month <- month(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(month)) + geom_bar()

ggplot(stuns, aes(month)) + geom_bar() + facet_grid(race_ethnicity ~.)

Day of the week

stuns$day <- wday(stuns$Date.of.Incident, label=TRUE)

ggplot(stuns, aes(day)) + geom_bar()

ggplot(stuns, aes(day)) + geom_bar() + facet_grid(race_ethnicity ~.)

Mapping total stun gun incidents

## Mapping

require(scales)
## Loading required package: scales
require(dplyr)
require(gtools)
## Loading required package: gtools
require(ggplot2)
require(rgdal)
## Loading required package: rgdal
## Warning: package 'rgdal' was built under R version 3.2.5
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-3
require(ggmap)
## Loading required package: ggmap
require(Cairo)
## Loading required package: Cairo
require(gpclib)
## Loading required package: gpclib
## General Polygon Clipper Library for R (version 1.5-5)
##  Type 'class ? gpc.poly' for help
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
require(reshape)
## Loading required package: reshape
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:dplyr':
## 
##     rename
library(devtools)
## Warning: package 'devtools' was built under R version 3.2.5
library(stringr)
library(raster)
## 
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
library(sp)


by_dept_total[is.na(by_dept_total)] <-0 
by_dept_total$total <- by_dept_total$Asian + by_dept_total$Black + by_dept_total$Hispanic + by_dept_total$Unknown + by_dept_total$White
  
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
gpclibPermitStatus()
## [1] TRUE
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")

names(by_dept_total)[names(by_dept_total) == 'Law.Enforcement.Agency'] <- 'id'


total_map <- left_join(townborders, by_dept_total)
## Joining by: "id"
tm_ct <- ggplot() +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Where people are tased (total)", fill="")

print(tm_ct)

Mapping percent by race

# White
names(by_dept_percent)[names(by_dept_percent) == 'Law.Enforcement.Agency'] <- 'id'


percent_map <- left_join(townborders, by_dept_percent)
## Joining by: "id"
tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of white suspects tased", fill="")

print(tm_ct)

# Black

tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Black suspects tased", fill="")

print(tm_ct)

# Hispanic
tm_ct <- ggplot() +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Percent of Hispanic suspects tased", fill="")

print(tm_ct)

Gender breakdown

gender <- stuns %>%
  group_by(Sex) %>%
  summarise(Count=n()) %>%
  mutate(Percent=round(Count/sum(Count)*100,2))

kable(gender)
Sex Count Percent
Female 37 6.07
Male 573 93.93

Median age

# Median age

median(as.numeric(stuns$Age), na.rm=T)
## [1] 32

Age distribution

stuns$Age <- as.numeric(stuns$Age)

# Age distribution
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=1)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

ggplot(stuns, aes(Age)) + geom_histogram(binwidth=10)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

stuns3 <- subset(stuns, Age < 100)
stuns3 <- subset(stuns3, race_ethnicity!="Asian")
stuns3 <- subset(stuns3, race_ethnicity!="Unknown")

gg <- ggplot(stuns3, aes(Age)) + geom_histogram(fill="#bf6151", binwidth=2) + facet_grid(race_ethnicity ~ .)
gg <- gg + labs(x="Age", y="Stuns deployed", title="Distribution of stuns by age",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

Where does this happen?

locations <- stuns %>%
  group_by(Location.Environment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2))

kable(locations)
Location.Environment count percent
Commercial Establishment 20 3.28
Educational Facility 7 1.15
Indoors-Private Property 16 2.62
Indoors-Public Building 44 7.21
Other Residence 31 5.08
Outdoors-Private Property 76 12.46
Outdoors-Public Area 241 39.51
Subject’s Residence 172 28.20
NA 3 0.49
gg <- ggplot(stuns3, aes(Location.Environment)) + facet_grid(race_ethnicity ~.) + geom_bar(fill="#bf6151") + coord_flip()
gg <- gg + labs(x=NULL, y="Stuns deployed", title="Distribution of stuns by location",
                subtitle="",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22, hjust=-1.1))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
## Warning: Removed 3 rows containing non-finite values (stat_count).

What is the subject holding

stuns$Armed.with <- str_to_title(stuns$Armed.with)
weapon <- stuns %>%
  group_by(Armed.with) %>%
  summarise(count=n()) %>%
  arrange(-count)

weapon <- subset(weapon, !is.na(Armed.with))
kable(weapon)
Armed.with count
Knife 26
Baseball Bat 3
Broken Glass 3
Firearm 3
Hammer 3
Vehicle 3
Box Cutter 2
Possible Knife 2
1 1
12" Kitchen Knife 1
15" Wooden Stake 1
2 Knives Within Wingspan 1
Arrow 1
Butcher Knife 1
Chair 1
Facsimile Firearm 1
Glass Bottle, Possibly Sledgehammer 1
Handgun 1
Handgun Pointed (Later Determined Replica) 1
Heavy Metal Stanchion 1
Knife 1
Knife And Dagger 1
Knife Sharpening Steel 14" Long 1
Knife To His Neck 1
Knife, Possibly A Machete 1
Large Butcher Knife 1
Large K-Bar Knife 1
Large Kitchen Knife 1
Large Stick 1
Metal Cane 1
Metal Pipe 1
Object With Pointed Tip In Hand 1
Pistol 1
Pitbull 1
Pocket Knife 1
Possible Firearm 1
Possible Gun 1
Possible Weapon-History Of Weapons Charges 1
Possibly Long Rifle 1
Rifle 1
Sword 1
Threatened Use Of Knife To Kill 1
Unknown If Armed 1
Unknown Object In Hand 1
Utility Knife 1
Winged Cork Screw 1
Wooden Closet Rod 1

How often tased

stuns$Number.of.Deployments <- as.numeric(stuns$Number.of.Deployments)
## Warning: NAs introduced by coercion
deployed <- stuns %>%
  group_by(race_ethnicity) %>%
  summarise(mean(Number.of.Deployments, na.rm=T))

kable(deployed)
race_ethnicity mean(Number.of.Deployments, na.rm = T)
Asian 1.0000000
Black 1.1314286
Hispanic 1.1869919
Unknown 1.0000000
White 0.9037037

Doesn’t say much. Let’s try another factor

stuns$deployment <- ifelse(stuns$Number.of.Deployments>1, "multiple", "single")
stuns$deployment <- ifelse(stuns$Number.of.Deployments==0, "none", stuns$deployment)

deployed <- stuns %>%
  group_by(race_ethnicity, deployment) %>%
  summarise(count=n()) %>%
  mutate(percent=round(count/sum(count)*100,2)) 

deployed <- deployed[c("race_ethnicity", "deployment", "percent")]

deployed <- subset(deployed, !is.na(deployment))

deployed <- deployed %>%
  spread(deployment, percent)

kable(deployed)
race_ethnicity multiple none single
Asian NA NA 100.00
Black 19.79 19.79 54.01
Hispanic 27.69 33.08 33.85
Unknown NA NA 100.00
White 17.93 40.00 35.17

Weights

stuns$Weight <- as.numeric(stuns$Weight)
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Weight)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Pounds", title="Distribution of number of stuns by suspect's weight",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

b2<-ggplot(stuns, aes(factor(Number.of.Deployments), Weight)) + 
  geom_jitter(alpha=I(1/4), aes(color=Number.of.Deployments)) +
  theme(legend.position = "none")

Heights

stuns$Height <- ifelse(stuns$Height=="6'", "6'0\"", stuns$Height)
stuns$Height <- ifelse(stuns$Height=="5'", "5'0\"", stuns$Height)

stuns$Height.inches <- sapply(strsplit(as.character(stuns$Height),"'|\""),
       function(x){12*as.numeric(x[1]) + as.numeric(x[2])})
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))

gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Height.inches)) 
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments)))) 
gg <- gg + labs(x="Stuns deployed", y="Inches", title="Distribution of number of stuns by suspect's height",
                subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
                caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg +  theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg